{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2023-12-26T06:37:04.752922800Z",
     "start_time": "2023-12-26T06:37:04.717259200Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "         title                           address              house_type  \\\n0         凌云华府        [ 罗湖 笋岗 ] 笋岗街道红岭北路与泥岗东路交界处            建筑面积：630000㎡   \n1       百合世纪广场              [ 龙岗 布吉 ] 锦龙路与翔鸽路交叉口  3室|4室|5室|建筑面积：101-174㎡   \n2        金地明峰府              [ 光明 马田 ] 地铁6号线薯田埔站旁      3室|4室|建筑面积：81-116㎡   \n3         卓越九珑              [ 南山 南头 ] 大新地铁站约300米      3室|4室|建筑面积：83-119㎡   \n4        华侨城宝辰               [ 宝安 宝安中心区 ] 香湾一路3号           建筑面积：220-374㎡   \n..         ...                               ...                     ...   \n160  满京华SOHO藝峦  [ 宝安 西乡 ] 宝安广深公路（107国道）与西乡大道交...             建筑面积：77203㎡   \n161  坪山招商花园城商铺           [ 坪山 坪山街道 ] 坪山管委会及中心公园旁            建筑面积：400000㎡   \n162       华廷悦府              [ 深圳周边 惠州 ] 陈江大道南19号      2室|3室|建筑面积：86-142㎡   \n163      冠科泊樾湾            [ 深圳周边 东莞 ] 太安路虎门段289号      3室|4室|建筑面积：83-125㎡   \n164    华润清溪润溪山              [ 深圳周边 东莞 ] 清溪大道东40米        别墅|建筑面积：143-170㎡   \n\n    tags_wrap_1               tags_wrap_2    price  \n0         待售|住宅    品牌开发商|配套纯熟|公园|便利店|生态绿地  81000.0  \n1         待售|住宅      学校|配套纯熟|多轨交|购物中心|商业街      NaN  \n2         在售|住宅       学校|银行|高绿化率|大型社区|大户型  41500.0  \n3         待售|住宅     品牌开发商|轨交房|学校|配套纯熟|商业街  98328.0  \n4      在售|标准写字楼  自带商业|核心CBD|地铁沿线|配套纯熟|便利店  86000.0  \n..          ...                       ...      ...  \n160    售罄|标准写字楼         购物中心|商业街|医院|学校|银行  36250.0  \n161          售罄         购物中心|商业街|公园|医院|学校  65130.0  \n162       售罄|住宅        VR看房|商业街|医院|银行|便利店  13158.0  \n163       售罄|住宅     现房|滨海湾新区|装修交付|大户型|商业街  24000.0  \n164       售罄|别墅    装修交付|南北通透|厨卫全明|大户型|大阳台  59987.0  \n\n[1305 rows x 6 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title</th>\n      <th>address</th>\n      <th>house_type</th>\n      <th>tags_wrap_1</th>\n      <th>tags_wrap_2</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>凌云华府</td>\n      <td>[ 罗湖 笋岗 ] 笋岗街道红岭北路与泥岗东路交界处</td>\n      <td>建筑面积：630000㎡</td>\n      <td>待售|住宅</td>\n      <td>品牌开发商|配套纯熟|公园|便利店|生态绿地</td>\n      <td>81000.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>百合世纪广场</td>\n      <td>[ 龙岗 布吉 ] 锦龙路与翔鸽路交叉口</td>\n      <td>3室|4室|5室|建筑面积：101-174㎡</td>\n      <td>待售|住宅</td>\n      <td>学校|配套纯熟|多轨交|购物中心|商业街</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>金地明峰府</td>\n      <td>[ 光明 马田 ] 地铁6号线薯田埔站旁</td>\n      <td>3室|4室|建筑面积：81-116㎡</td>\n      <td>在售|住宅</td>\n      <td>学校|银行|高绿化率|大型社区|大户型</td>\n      <td>41500.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>卓越九珑</td>\n      <td>[ 南山 南头 ] 大新地铁站约300米</td>\n      <td>3室|4室|建筑面积：83-119㎡</td>\n      <td>待售|住宅</td>\n      <td>品牌开发商|轨交房|学校|配套纯熟|商业街</td>\n      <td>98328.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>华侨城宝辰</td>\n      <td>[ 宝安 宝安中心区 ] 香湾一路3号</td>\n      <td>建筑面积：220-374㎡</td>\n      <td>在售|标准写字楼</td>\n      <td>自带商业|核心CBD|地铁沿线|配套纯熟|便利店</td>\n      <td>86000.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>160</th>\n      <td>满京华SOHO藝峦</td>\n      <td>[ 宝安 西乡 ] 宝安广深公路（107国道）与西乡大道交...</td>\n      <td>建筑面积：77203㎡</td>\n      <td>售罄|标准写字楼</td>\n      <td>购物中心|商业街|医院|学校|银行</td>\n      <td>36250.0</td>\n    </tr>\n    <tr>\n      <th>161</th>\n      <td>坪山招商花园城商铺</td>\n      <td>[ 坪山 坪山街道 ] 坪山管委会及中心公园旁</td>\n      <td>建筑面积：400000㎡</td>\n      <td>售罄</td>\n      <td>购物中心|商业街|公园|医院|学校</td>\n      <td>65130.0</td>\n    </tr>\n    <tr>\n      <th>162</th>\n      <td>华廷悦府</td>\n      <td>[ 深圳周边 惠州 ] 陈江大道南19号</td>\n      <td>2室|3室|建筑面积：86-142㎡</td>\n      <td>售罄|住宅</td>\n      <td>VR看房|商业街|医院|银行|便利店</td>\n      <td>13158.0</td>\n    </tr>\n    <tr>\n      <th>163</th>\n      <td>冠科泊樾湾</td>\n      <td>[ 深圳周边 东莞 ] 太安路虎门段289号</td>\n      <td>3室|4室|建筑面积：83-125㎡</td>\n      <td>售罄|住宅</td>\n      <td>现房|滨海湾新区|装修交付|大户型|商业街</td>\n      <td>24000.0</td>\n    </tr>\n    <tr>\n      <th>164</th>\n      <td>华润清溪润溪山</td>\n      <td>[ 深圳周边 东莞 ] 清溪大道东40米</td>\n      <td>别墅|建筑面积：143-170㎡</td>\n      <td>售罄|别墅</td>\n      <td>装修交付|南北通透|厨卫全明|大户型|大阳台</td>\n      <td>59987.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>1305 rows × 6 columns</p>\n</div>"
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "df_1 = pd.read_csv('../static/data/info_1.csv')\n",
    "df_2 = pd.read_csv('../static/data/info_2.csv')\n",
    "df_3 = pd.read_csv('../static/data/info_3.csv')\n",
    "df_4 = pd.read_csv('../static/data/info_4.csv')\n",
    "df_5 = pd.read_csv('../static/data/info_5.csv')\n",
    "df = pd.concat([df_1,df_2,df_3,df_4,df_5],axis=0)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "outputs": [],
   "source": [
    "df['address'] = df['address'].apply(lambda x:x.replace(' ',''))\n",
    "df['region'] = df['address'].apply(lambda x:re.findall('\\[(.*?)\\]',x)[0][:2]+'区')\n",
    "df['sale_status'] = df['tags_wrap_1'].apply(lambda x:x.split('|')[0] if len(x.split('|'))==2 else '无')\n",
    "df['dwelling_type'] = df['tags_wrap_1'].apply(lambda x:x.split('|')[1] if len(x.split('|'))==2 else '无')\n",
    "df['house_type'] = df['house_type'].apply(lambda x:'' if pd.isnull(x) else x)\n",
    "df['unit_type'] = df['house_type'].apply(lambda x:'|'.join(re.findall('\\d室',x)))\n",
    "df['area'] = df['house_type'].apply(lambda x:''.join(re.findall('建筑面积：(.*?)㎡',x)))\n",
    "df['unit_type'] = df['unit_type'].apply(lambda x:'无' if x == '' else x)\n",
    "df['price'] = df['price'].apply(lambda x:0 if pd.isnull(x) else x)\n",
    "df.rename({'tags_wrap_2':'tags'},axis=1,inplace=True)\n",
    "df = df[['title','address','tags','price','region','sale_status','dwelling_type','unit_type','area']]\n",
    "df.to_csv('../static/data/info_pre.csv',index=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-26T06:37:06.539942700Z",
     "start_time": "2023-12-26T06:37:06.509287900Z"
    }
   },
   "id": "f16b6b966d5b4fa7"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
